true

Spatial Interpolation: meuse

Data

library(sp)
library(gstat)
library(sf)
library(mapview)
library(tidyverse)
data(meuse)
head(meuse)
##        x      y cadmium copper lead zinc  elev       dist   om ffreq soil lime
## 1 181072 333611    11.7     85  299 1022 7.909 0.00135803 13.6     1    1    1
## 2 181025 333558     8.6     81  277 1141 6.983 0.01222430 14.0     1    1    1
## 3 181165 333537     6.5     68  199  640 7.800 0.10302900 13.0     1    1    1
## 4 181298 333484     2.6     81  116  257 7.655 0.19009400  8.0     1    2    0
## 5 181307 333330     2.8     48  117  269 7.480 0.27709000  8.7     1    2    0
## 6 181390 333260     3.0     61  137  281 7.791 0.36406700  7.8     1    2    0
##   landuse dist.m
## 1      Ah     50
## 2      Ah     30
## 3      Ah    150
## 4      Ga    270
## 5      Ah    380
## 6      Ga    470
ggplot(data = meuse) + geom_point(aes(x, y))

data(meuse.grid)
head(meuse.grid)
##        x      y part.a part.b      dist soil ffreq
## 1 181180 333740      1      0 0.0000000    1     1
## 2 181140 333700      1      0 0.0000000    1     1
## 3 181180 333700      1      0 0.0122243    1     1
## 4 181220 333700      1      0 0.0434678    1     1
## 5 181100 333660      1      0 0.0000000    1     1
## 6 181140 333660      1      0 0.0122243    1     1
ggplot(data = meuse.grid) + geom_point(aes(x, y))

Summary Statistics

summary(meuse)
##        x                y             cadmium           copper      
##  Min.   :178605   Min.   :329714   Min.   : 0.200   Min.   : 14.00  
##  1st Qu.:179371   1st Qu.:330762   1st Qu.: 0.800   1st Qu.: 23.00  
##  Median :179991   Median :331633   Median : 2.100   Median : 31.00  
##  Mean   :180005   Mean   :331635   Mean   : 3.246   Mean   : 40.32  
##  3rd Qu.:180630   3rd Qu.:332463   3rd Qu.: 3.850   3rd Qu.: 49.50  
##  Max.   :181390   Max.   :333611   Max.   :18.100   Max.   :128.00  
##                                                                     
##       lead            zinc             elev             dist        
##  Min.   : 37.0   Min.   : 113.0   Min.   : 5.180   Min.   :0.00000  
##  1st Qu.: 72.5   1st Qu.: 198.0   1st Qu.: 7.546   1st Qu.:0.07569  
##  Median :123.0   Median : 326.0   Median : 8.180   Median :0.21184  
##  Mean   :153.4   Mean   : 469.7   Mean   : 8.165   Mean   :0.24002  
##  3rd Qu.:207.0   3rd Qu.: 674.5   3rd Qu.: 8.955   3rd Qu.:0.36407  
##  Max.   :654.0   Max.   :1839.0   Max.   :10.520   Max.   :0.88039  
##                                                                     
##        om         ffreq  soil   lime       landuse       dist.m      
##  Min.   : 1.000   1:84   1:97   0:111   W      :50   Min.   :  10.0  
##  1st Qu.: 5.300   2:48   2:46   1: 44   Ah     :39   1st Qu.:  80.0  
##  Median : 6.900   3:23   3:12           Am     :22   Median : 270.0  
##  Mean   : 7.478                         Fw     :10   Mean   : 290.3  
##  3rd Qu.: 9.000                         Ab     : 8   3rd Qu.: 450.0  
##  Max.   :17.000                         (Other):25   Max.   :1000.0  
##  NA's   :2                              NA's   : 1

Bubble plot

ggplot(data = meuse) + geom_point(aes(x, y, size=zinc))

Spatial Data Wrangling

class(meuse)
## [1] "data.frame"
class(meuse.grid)
## [1] "data.frame"

Convert the meuse and meuse.grid data frames to sf objects using the st_as_sf()

meuse2 <- st_as_sf(meuse, coords = c("x", "y"), crs = 28992)
meuse.grid2 <- st_as_sf(meuse.grid, coords = c("x", "y"),
                       crs = 28992)

Recheck class type

class(meuse2)
## [1] "sf"         "data.frame"
class(meuse.grid2)
## [1] "sf"         "data.frame"
head(meuse2)
## Simple feature collection with 6 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181025 ymin: 333260 xmax: 181390 ymax: 333611
## Projected CRS: Amersfoort / RD New
##   cadmium copper lead zinc  elev       dist   om ffreq soil lime landuse dist.m
## 1    11.7     85  299 1022 7.909 0.00135803 13.6     1    1    1      Ah     50
## 2     8.6     81  277 1141 6.983 0.01222430 14.0     1    1    1      Ah     30
## 3     6.5     68  199  640 7.800 0.10302900 13.0     1    1    1      Ah    150
## 4     2.6     81  116  257 7.655 0.19009400  8.0     1    2    0      Ga    270
## 5     2.8     48  117  269 7.480 0.27709000  8.7     1    2    0      Ah    380
## 6     3.0     61  137  281 7.791 0.36406700  7.8     1    2    0      Ga    470
##                geometry
## 1 POINT (181072 333611)
## 2 POINT (181025 333558)
## 3 POINT (181165 333537)
## 4 POINT (181298 333484)
## 5 POINT (181307 333330)
## 6 POINT (181390 333260)
head(meuse.grid2)
## Simple feature collection with 6 features and 5 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
##   part.a part.b      dist soil ffreq              geometry
## 1      1      0 0.0000000    1     1 POINT (181180 333740)
## 2      1      0 0.0000000    1     1 POINT (181140 333700)
## 3      1      0 0.0122243    1     1 POINT (181180 333700)
## 4      1      0 0.0434678    1     1 POINT (181220 333700)
## 5      1      0 0.0000000    1     1 POINT (181100 333660)
## 6      1      0 0.0122243    1     1 POINT (181140 333660)

Interactive maps

mapview(meuse2, zcol = "zinc",  map.types = "CartoDB.Voyager")
mapview(meuse.grid2,  map.types = "CartoDB.Voyager")

EDA

ggplot(meuse, aes(x=dist, y=zinc)) + geom_point()

ggplot(meuse, aes(x=dist, y=log(zinc))) + geom_point()

ggplot(meuse, aes(x=sqrt(dist), y=log(zinc))) + geom_point()

library(GGally)
ggpairs(meuse)

ggpairs(meuse[,3:9])
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).

Simple Linear Regression Model

lm <- lm(log(zinc)~sqrt(dist), meuse)
meuse$residuals <- residuals(lm)
meuse$fitted <- fitted(lm) 
meuse$fitted2 <- predict(lm, meuse) 
meuse$fitted.s <- predict(lm, meuse) - mean(predict(lm, meuse))
head(meuse)
##        x      y cadmium copper lead zinc  elev       dist   om ffreq soil lime
## 1 181072 333611    11.7     85  299 1022 7.909 0.00135803 13.6     1    1    1
## 2 181025 333558     8.6     81  277 1141 6.983 0.01222430 14.0     1    1    1
## 3 181165 333537     6.5     68  199  640 7.800 0.10302900 13.0     1    1    1
## 4 181298 333484     2.6     81  116  257 7.655 0.19009400  8.0     1    2    0
## 5 181307 333330     2.8     48  117  269 7.480 0.27709000  8.7     1    2    0
## 6 181390 333260     3.0     61  137  281 7.791 0.36406700  7.8     1    2    0
##   landuse dist.m   residuals   fitted  fitted2     fitted.s
## 1      Ah     50  0.02907908 6.900438 6.900438  1.014661840
## 2      Ah     30  0.32712956 6.712531 6.712531  0.826754936
## 3      Ah    150  0.28533439 6.176134 6.176134  0.290357936
## 4      Ga    270 -0.33385786 5.882934 5.882934 -0.002841905
## 5      Ah    380 -0.05778586 5.652497 5.652497 -0.233278608
## 6      Ga    470  0.18211082 5.456244 5.456244 -0.429532005
library(viridis)
## Loading required package: viridisLite
ggplot(meuse, aes(x=x, y=y, col=fitted))+geom_point()+scale_color_viridis()

ggplot(meuse, aes(x=x, y=y, col=residuals))+geom_point()+scale_color_viridis()

ggplot(meuse, aes(x=x, y=y, col=fitted.s))+geom_point()+scale_color_viridis()

Inverse Distance Interpolation

library(stars) |> suppressPackageStartupMessages()
library(gstat)
idw_result <- idw(zinc~1, meuse2, meuse.grid2)
## [inverse distance weighted interpolation]
idw_result
## Simple feature collection with 3103 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 178460 ymin: 329620 xmax: 181540 ymax: 333740
## Projected CRS: Amersfoort / RD New
## First 10 features:
##    var1.pred var1.var              geometry
## 1   633.6864       NA POINT (181180 333740)
## 2   712.5450       NA POINT (181140 333700)
## 3   654.1617       NA POINT (181180 333700)
## 4   604.4422       NA POINT (181220 333700)
## 5   857.2558       NA POINT (181100 333660)
## 6   755.5061       NA POINT (181140 333660)
## 7   667.7526       NA POINT (181180 333660)
## 8   604.4254       NA POINT (181220 333660)
## 9  1003.8711       NA POINT (181060 333620)
## 10  945.0017       NA POINT (181100 333620)
# Convert IDW output to a data frame
idw_df <- as.data.frame(idw_result)
head(idw_df)
##   var1.pred var1.var              geometry
## 1  633.6864       NA POINT (181180 333740)
## 2  712.5450       NA POINT (181140 333700)
## 3  654.1617       NA POINT (181180 333700)
## 4  604.4422       NA POINT (181220 333700)
## 5  857.2558       NA POINT (181100 333660)
## 6  755.5061       NA POINT (181140 333660)
colnames(idw_df) <- c("x", "y", "zinc_pred")
head(idw_df)
##          x  y             zinc_pred
## 1 633.6864 NA POINT (181180 333740)
## 2 712.5450 NA POINT (181140 333700)
## 3 654.1617 NA POINT (181180 333700)
## 4 604.4422 NA POINT (181220 333700)
## 5 857.2558 NA POINT (181100 333660)
## 6 755.5061 NA POINT (181140 333660)

Visualize IDW values

ggplot(data = meuse2) + geom_sf(data = idw_result, aes(color = var1.pred)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

Spatial Kriging

Lagged Scatter plots

hscat(log(zinc)~1, meuse2, (0:9)*100)

Variogram Cloud

vc <- variogram(log(zinc) ~ 1, meuse2, cloud = TRUE)
plot(vc)

Binned Variogram/ Sample Variogram

v <- variogram(log(zinc) ~ 1, data = meuse2)
plot(v)

List of available models for variograms

vgm()
##    short                                      long
## 1    Nug                              Nug (nugget)
## 2    Exp                         Exp (exponential)
## 3    Sph                           Sph (spherical)
## 4    Gau                            Gau (gaussian)
## 5    Exc        Exclass (Exponential class/stable)
## 6    Mat                              Mat (Matern)
## 7    Ste Mat (Matern, M. Stein's parameterization)
## 8    Cir                            Cir (circular)
## 9    Lin                              Lin (linear)
## 10   Bes                              Bes (bessel)
## 11   Pen                      Pen (pentaspherical)
## 12   Per                            Per (periodic)
## 13   Wav                                Wav (wave)
## 14   Hol                                Hol (hole)
## 15   Log                         Log (logarithmic)
## 16   Pow                               Pow (power)
## 17   Spl                              Spl (spline)
## 18   Leg                            Leg (Legendre)
## 19   Err                   Err (Measurement error)
## 20   Int                           Int (Intercept)
show.vgms(par.strip.text = list(cex = 0.75))

Fitting Variogram Models

Trial 1

Assess our initial model

vg.fit1 <- vgm(psill = 0.5, model = "Sph",
                range = 900, nugget = 0.1)
vg.fit1 
##   model psill range
## 1   Nug   0.1     0
## 2   Sph   0.5   900
plot(v, vg.fit1 , cutoff = 1000, cex = 1.5)

Fit a variogram providing the sample variogram

fv <- fit.variogram(object = v,
                    model = vgm(psill = 0.5, model = "Sph",
                                range = 900, nugget = 0.1))
fv
##   model      psill    range
## 1   Nug 0.05066017   0.0000
## 2   Sph 0.59060556 897.0044
attr(fv, "SSErr")
## [1] 9.011194e-06
plot(v, fv, cex = 1.5)

Trial 2

fv2 <- fit.variogram(object = v,
                    model = vgm(psill = 0.6, model = "Sph",range = 900, nugget = 0.1))
fv2
##   model      psill    range
## 1   Nug 0.05066522   0.0000
## 2   Sph 0.59061054 897.0412
attr(fv2, "SSErr")
## [1] 9.011195e-06
plot(v, fv2, cex = 1.5)

Trial 3

fv3 <- fit.variogram(object = v,
                    model = vgm(psill = 1, model = "Exp",range = 800, nugget = 0.1))
fv3
##   model     psill    range
## 1   Nug 0.0000000   0.0000
## 2   Exp 0.7186519 449.7572
attr(fv3, "SSErr")
## [1] 1.628328e-05
plot(v, fv3, cex = 1.5)

If the process is isotropic we can directly go for kriging

library(ggplot2)
library(viridis)

k <- gstat(formula = log(zinc) ~ 1, data = meuse2, model = fv)
kpred <- predict(k, meuse.grid2)
## [using ordinary kriging]
head(kpred)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
##   var1.pred  var1.var              geometry
## 1  6.499619 0.3198083 POINT (181180 333740)
## 2  6.622352 0.2520197 POINT (181140 333700)
## 3  6.505162 0.2729850 POINT (181180 333700)
## 4  6.387586 0.2955288 POINT (181220 333700)
## 5  6.764491 0.1779405 POINT (181100 333660)
## 6  6.635511 0.2022032 POINT (181140 333660)

Visualisation of Predictions

ggplot() + geom_sf(data = kpred, aes(color = var1.pred)) +
  geom_sf(data = meuse2) +
  scale_color_viridis(name = "log(zinc)") + theme_bw()

ggplot() + geom_sf(data = kpred, aes(color = var1.var)) +
  geom_sf(data = meuse2) +
  scale_color_viridis(name = "variance") + theme_bw()

Detecting Anisotropy

vg.dir <- variogram(log(zinc)~1, alpha = c(0, 45, 90, 135), meuse2)
plot(vg.dir)

Fit Anisotropic Variogram Models

The function call vgm(psill=0.59, model = “Sph”, range = 1200, nugget = 0.05, anis = c(45, 0.4)) in R creates a variogram model using the vgm() function from the gstat package. Here’s what each parameter means:

  • psill = 0.59: This is the partial sill, representing the difference between the sill (total variance) and the nugget effect. It defines the maximum semivariance minus the nugget.

  • model = “Sph”: The model type is Spherical, one of the commonly used variogram models. It describes how spatial correlation decreases with distance.

  • range = 1200: This is the range, the distance at which the spatial correlation drops to near zero (or 95% of the sill for the spherical model). Beyond this distance, observations are considered uncorrelated.

  • nugget = 0.05: The nugget effect, representing spatial variability at very small scales or measurement errors. It accounts for discontinuity at very short distances.

  • anis = c(45, 0.4): Specifies anisotropy, meaning the spatial correlation differs by direction.

  • 45 indicates the direction (angle in degrees from the x-axis) where the correlation is strongest.

  • 0.4 is the anisotropy ratio, meaning correlation decays faster in the perpendicular direction. A value of 0.4 means that in the perpendicular direction, the range is only 40% of the main direction’s range.

fit.ani <- vgm(psill=0.59, model = "Sph", range = 1200, nugget = 0.05, anis = c(45, 0.4))
plot(vg.dir, fit.ani)

This variogram model describes spatial dependence using a spherical variogram with anisotropy. The correlation extends up to 1200 units in the primary direction (45°), but only 480 units (1200 × 0.4) in the perpendicular direction. There is a small nugget effect (0.05), indicating slight short-scale variation or measurement noise.

Variogram Maps

Another way to detect directional dependence

vg.map <- variogram(log(zinc)~1, meuse2, map=T, cutoff=1000, width=100)
plot(vg.map, threshold=5)

Explanation of the Code: vg.map <- variogram(log(zinc) ~ 1, meuse, map = TRUE, cutoff = 1000, width = 100)

This code computes and visualizes the variogram map of the log-transformed zinc concentrations in the Meuse dataset.

  1. This function calculates a directional semivariogram for the spatial data.
  • variogram(log(zinc) ~ 1, meuse, …)

    Computes the experimental semivariogram for log-transformed zinc concentrations. The ~ 1 formula means it calculates the variogram without considering covariates (i.e., only based on spatial distances).
  • map = TRUE

    Instead of returning a traditional one-dimensional variogram, this creates a spatial variogram map, which helps identify anisotropy (directional dependence of spatial variation).

  • cutoff = 1000

    Defines the maximum lag distance (1,000 meters). Pairs of points separated by distances greater than this are ignored.

  • width = 100

    Sets the bin width for grouping distances. Distance intervals of 100 meters are used when computing the variogram.

  1. Breakdown of plot(vg.map, threshold = 5) This function plots the variogram map.
  • plot(vg.map, …)

    Plots the spatial variogram map, where each pixel represents a directional semivariance value for a given spatial lag.

  • threshold = 5

    Sets a threshold to filter high semivariance values to make the plot more interpretable.

Model residuals obtain from log(zinc)~sqrt(dist)

vgreg.est <- variogram(log(zinc)~sqrt(dist), meuse2)
plot(vgreg.est)

vgreg.fit <- fit.variogram(vgreg.est, vgm(0.2, "Sph", 800, 0.05))
vgreg.fit
##   model      psill    range
## 1   Nug 0.07978675   0.0000
## 2   Sph 0.14902954 871.8744
plot(vgreg.est, vgreg.fit, cex = 1.5)

vgreg.dir <- variogram(log(zinc)~sqrt(dist), meuse2, alpha=c(0, 45, 90, 135))
plot(vgreg.dir)

plot(vgreg.dir, vgreg.fit, cex = 1.5)

The directional dependence is much less obvious in this case.

Another way to show this is variogram maps

vgreg.map <- variogram(log(zinc)~sqrt(dist), meuse2, map=T, cutoff=1000, width=100)
plot(vgreg.map, threshold=5)

Simple Kriging: Without Handling Anisotropy

Note: This is an illustration of the codes. In the current implementation, anisotropy is ignored, meaning the model assumes the spatial correlation is the same in all directions. If anisotropy exists (i.e., the spatial dependence varies by direction), we need to incorporate it explicitly in the variogram model.

  1. Compute Variograms & Fit Models
zinc_mean <- mean(log(meuse$zinc)) # Required for SK
v_sk_est <- variogram(log(zinc) ~ 1, meuse2, cloud=F)
v_sk_est
##     np       dist     gamma dir.hor dir.ver   id
## 1   57   79.29244 0.1234479       0       0 var1
## 2  299  163.97367 0.2162185       0       0 var1
## 3  419  267.36483 0.3027859       0       0 var1
## 4  457  372.73542 0.4121448       0       0 var1
## 5  547  478.47670 0.4634128       0       0 var1
## 6  533  585.34058 0.5646933       0       0 var1
## 7  574  693.14526 0.5689683       0       0 var1
## 8  564  796.18365 0.6186769       0       0 var1
## 9  589  903.14650 0.6471479       0       0 var1
## 10 543 1011.29177 0.6915705       0       0 var1
## 11 500 1117.86235 0.7033984       0       0 var1
## 12 477 1221.32810 0.6038770       0       0 var1
## 13 452 1329.16407 0.6517158       0       0 var1
## 14 457 1437.25620 0.5665318       0       0 var1
## 15 415 1543.20248 0.5748227       0       0 var1
plot(v_sk_est, main="Sample Variogram")

v_sk_fit <- fit.variogram(v_sk_est, model = vgm(1, "Sph", 900, 0.1))
v_sk_fit
##   model      psill    range
## 1   Nug 0.05066243   0.0000
## 2   Sph 0.59060780 897.0209
plot(v_sk_est, v_sk_fit)

attr(v_sk_fit, "SSErr")
## [1] 9.011194e-06
  1. Perform Kriging Interpolation
krige_sk <- krige(log(zinc) ~ 1, meuse2, meuse.grid2, model = v_sk_fit, beta = zinc_mean)
## [using simple kriging]
head(krige_sk)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
##   var1.pred  var1.var              geometry
## 1  6.447758 0.3160027 POINT (181180 333740)
## 2  6.585255 0.2500732 POINT (181140 333700)
## 3  6.465116 0.2707163 POINT (181180 333700)
## 4  6.343498 0.2927786 POINT (181220 333700)
## 5  6.741960 0.1772242 POINT (181100 333660)
## 6  6.610664 0.2013310 POINT (181140 333660)
  1. Visualize the Kriging Results
ggplot(data = meuse2) + geom_sf(data = krige_sk, aes(color = var1.pred)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

ggplot(data = meuse2) + geom_sf(data = krige_sk, aes(color = var1.var)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

Ordinary Kriging: Without Handling Anisotropy

  1. Compute Variograms & Fit Models
v_ok <- variogram(log(zinc) ~ 1, meuse2)
v_ok_fit <- fit.variogram(v_ok, model = vgm(1, "Sph", 900, 0.1))
  1. Perform Kriging Interpolation
krige_ok <- krige(log(zinc) ~ 1, meuse2, meuse.grid2, model = v_ok_fit)
## [using ordinary kriging]
head(krige_ok)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
##   var1.pred  var1.var              geometry
## 1  6.499624 0.3198084 POINT (181180 333740)
## 2  6.622356 0.2520205 POINT (181140 333700)
## 3  6.505166 0.2729855 POINT (181180 333700)
## 4  6.387590 0.2955290 POINT (181220 333700)
## 5  6.764492 0.1779424 POINT (181100 333660)
## 6  6.635513 0.2022045 POINT (181140 333660)
  1. Visualize the Kriging Results
ggplot(data = meuse2) + geom_sf(data = krige_ok, aes(color = var1.pred)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

ggplot(data = meuse2) + geom_sf(data = krige_ok, aes(color = var1.var)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

Universal Kriging

  1. Compute Variograms & Fit Models
v_uk <- variogram(log(zinc) ~ sqrt(dist), meuse2)
v_uk_fit <- fit.variogram(v_uk, model = vgm(1, "Sph", 900, 0.1))
  1. Perform Kriging Interpolation
krige_uk <- krige(log(zinc) ~ sqrt(dist), meuse2, meuse.grid2, model = v_uk_fit)
## [using universal kriging]
head(krige_uk)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
##   var1.pred  var1.var              geometry
## 1  7.071054 0.1683803 POINT (181180 333740)
## 2  7.088269 0.1518497 POINT (181140 333700)
## 3  6.785167 0.1537665 POINT (181180 333700)
## 4  6.512936 0.1576136 POINT (181220 333700)
## 5  7.104999 0.1343480 POINT (181100 333660)
## 6  6.804171 0.1368075 POINT (181140 333660)
  1. Visualize the Kriging Results
ggplot(data = meuse2) + geom_sf(data = krige_uk, aes(color = var1.pred)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

ggplot(data = meuse2) + geom_sf(data = krige_uk, aes(color = var1.var)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

Simple Kriging: Handling Anisotropy

1. Step 1: Compute Directional Variograms

library(gstat)

# Compute directional variogram with different angles (0, 45, 90, 135 degrees)
v_aniso <- variogram(log(zinc) ~ 1, meuse2, alpha = c(0, 45, 90, 135))

# Plot the directional variograms
plot(v_aniso)

2. Fit Anisotropic Variogram Model

  • psill = 0.59:

The partial sill or psill represents the variance of the data (the overall range of spatial variation). In this case, it indicates that the variance of the spatial process is 0.59 after subtracting the nugget (if any).

  • model = “Sph”:

This specifies the model of spatial correlation. “Sph” stands for the spherical model of variograms, which assumes the correlation increases with distance until it reaches a maximum (the range), after which it flattens out. Other models include “Exp” (exponential) and “Gau” (Gaussian), each representing different types of spatial correlation decay.

  • range = 1200:

The range refers to the distance at which the spatial correlation becomes negligible (i.e., it reaches its sill). After this distance, there is no longer a significant correlation between points. Here, the range is set to 1200 units, meaning that beyond 1200 units, the spatial dependence is assumed to be zero.

  • nugget = 0.05:

The nugget represents the variance at very small distances, or the “micro-scale” variability that isn’t captured by the spatial model. This is usually attributed to measurement errors or small-scale unmodeled variation.

In this case, the nugget is 0.05, meaning there’s a small amount of unexplained variability at the smallest distances. anis = c(45, 0.4):

This specifies the anisotropy in the variogram model:

45: The angle in degrees that defines the major axis of the anisotropy. This means the range in this direction (45°) is different from the range in the perpendicular direction (90°).

0.4: The anisotropy ratio (major range / minor range). This means the range in the major direction (45°) is 0.4 times the range in the minor direction (perpendicular to 45°). In other words, the spatial correlation is stronger in the direction of the major axis (45°) and weaker in the perpendicular direction.

  • Summary:

Partial Sill (0.59): The variance of the spatial process after accounting for the nugget. Spherical Model (“Sph”): Specifies the type of spatial correlation model used. Range (1200): The distance at which spatial correlation becomes negligible. Nugget (0.05): The small-scale variability (or measurement error) at very short distances. Anisotropy: A directional dependence in the spatial correlation: 45° direction has a range of 1200 units. The minor range (perpendicular to 45°) is 1200 / 0.4 = 3000 units.

# Fit anisotropic variogram model with a specified range and anisotropy ratio
fit.ani <- vgm(psill=0.59, model = "Sph", range = 1200, nugget = 0.05, anis = c(45, 0.4))
plot(vg.dir, fit.ani)

v_aniso_fit <- fit.variogram(v_aniso, 
                              model = fit.ani)
v_aniso_fit
##   model      psill    range ang1 anis1
## 1   Nug 0.05892577    0.000    0   1.0
## 2   Sph 0.58465419 1742.857   45   0.4
attr(v_aniso_fit, "SSErr")
## [1] 0.0005215035

3. Kriging interpolation

krige_aniso_sk <- krige(log(zinc) ~ 1, meuse2, meuse.grid2, model = v_aniso_fit, beta = mean(log(meuse$zinc)))
## [using simple kriging]
ggplot(data = meuse2) + geom_sf(data = krige_aniso_sk, aes(color = var1.pred)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

ggplot(data = meuse2) + geom_sf(data = krige_aniso_sk, aes(color = var1.var)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

Cross validation Residuals

  1. Split data into traing and test
set.seed(42)  # For reproducibility

# Sample indices for training set (70% of the data)
train_indices <- sample(1:nrow(meuse2), size = 0.7 * nrow(meuse2))

# Create training set and test set
train_data <- meuse2[train_indices, ]
test_data <- meuse2[-train_indices, ]

# Check the first few rows of both sets
head(train_data)
## Simple feature collection with 6 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 178875 ymin: 329714 xmax: 180270 ymax: 331955
## Projected CRS: Amersfoort / RD New
##     cadmium copper lead zinc  elev      dist   om ffreq soil lime landuse
## 50      1.7     24  112  282 9.420 0.5542890  4.5     1    2    0      Bw
## 66      7.4     72  181  801 7.360 0.0122243 15.2     1    1    1       W
## 158     2.1     31  119  342 8.429 0.2770900  6.5     3    1    0      Ah
## 83      3.1     39  237  593 6.320 0.2009760  7.0     1    1    1      Ah
## 151     3.8     39  179  612 7.300 0.0537723  8.8     3    1    0       W
## 128     0.2     25   94  253 8.779 0.1030240  8.1     2    1    1      Tv
##     dist.m              geometry
## 50     660 POINT (180270 331707)
## 66      20 POINT (179334 331366)
## 158    350 POINT (178875 330311)
## 83     260 POINT (179110 330758)
## 151     80 POINT (179245 329714)
## 128     70 POINT (179642 331955)
head(test_data)
## Simple feature collection with 6 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 180625 ymin: 332664 xmax: 181191 ymax: 333370
## Projected CRS: Amersfoort / RD New
##    cadmium copper lead zinc  elev      dist   om ffreq soil lime landuse dist.m
## 7      3.2     31  132  346 8.217 0.1900940  9.2     1    2    0      Ah    240
## 11     1.4     25   86  189 9.015 0.3151160  6.4     1    2    0      Fh    400
## 12     1.8     25   97  251 9.073 0.2281230  9.0     1    1    0      Ag    300
## 17     7.0     74  133  606 7.160 0.0122243 16.0     1    1    1       W     10
## 19     8.7     69  207  735 7.020 0.0000000 13.7     1    1    1       W     10
## 23     2.9     35  110  343 8.830 0.1139320  7.2     1    1    1      Ag    160
##                 geometry
## 7  POINT (181165 333370)
## 11 POINT (181191 333115)
## 12 POINT (181032 333031)
## 17 POINT (180763 333104)
## 19 POINT (180625 332847)
## 23 POINT (180704 332664)
  1. Fit the model using the training data
v_uk <- variogram(log(zinc) ~ sqrt(dist), train_data )
v_uk_fit <- fit.variogram(v_uk, model = vgm(1, "Sph", 900, 0.1))
krige_uk <- krige(log(zinc) ~ sqrt(dist), train_data, test_data, model = v_uk_fit)
## [using universal kriging]
head(krige_uk)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 180625 ymin: 332664 xmax: 181191 ymax: 333370
## Projected CRS: Amersfoort / RD New
##    var1.pred  var1.var              geometry
## 7   5.823064 0.1278743 POINT (181165 333370)
## 11  5.373305 0.1255082 POINT (181191 333115)
## 12  5.572201 0.1313608 POINT (181032 333031)
## 17  6.568145 0.1355524 POINT (180763 333104)
## 19  6.901494 0.1330293 POINT (180625 332847)
## 23  6.047125 0.1216261 POINT (180704 332664)
  1. Actual vs predicted
actual <- log(test_data$zinc)
predicted <- log(krige_uk$var1.pred)
dfuk <- data.frame(actual, predicted)
ggplot(dfuk, aes(x=actual, y=predicted)) + geom_point()

  1. Histogram of residuals
dfuk$resid <- dfuk$actual - dfuk$predicted
ggplot(dfuk, aes(x=resid)) + geom_histogram(col="white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.